program run_aca	
	prep_sahie
	aca_analysis
end

********* Import and save county-level data on the rate of uninsured population
* Data source: Small Area Health Insurance Estimates Program of US Census Bureau (SAHIE)
program prep_sahie 
	
	clear
	forvalues i = 2008/2017 {
		preserve
			import delimited "${raw_data}/SAHIE/sahie_`i'.csv", varnames(80) rowrange(80) clear 
			save "${intermediate_data}/SAHIE/sahie_`i'.dta", replace		
		restore
		append using "${intermediate_data}/SAHIE/sahie_`i'.dta"
	}
	
	* Keep county-level observations for all
	* demographic groups (==everyone age <65)
	
	keep if countyfips != 0
	keep if iprcat == 0 & agecat == 0 & racecat == 0 & sexcat == 0
	rename nipr n_pop_cty_yr

	* Create cfips code
	gen county = string(countyfips, "%03.0f")
	gen state = string(statefips, "%02.0f")
	gegen cfips = concat(state county)

	* Compute share uninsured and share insured in under-65 pop
	gen share_uninsured = pctelig/100
	gen share_insured = 1 - share_uninsured

	* Create treatment intensity variable - share of uninsured in 2013
	preserve
		keep if year == 2013
		keep cfips year share_uninsured
		greshape wide share_uninsured, i(cfips) j(year)
		tempfile treatment
		save "`treatment'", replace
	restore

	merge m:1 cfips using `treatment', keep(match) nogen
	drop if missing(n_pop_cty_yr)
	
	* Keep only states that exanded Medicaid in 2014 or 2015 
	* Excludes 5 states that had comprehensive early 
	* expansions from 2010 to 2014:
	* Delaware, DC, Massachusetts, New York, Vermont 
	
	forvalues x = 4/5 {
		gen expanded_201`x' = 0
	}
	
	foreach state in 04 05 06 08 09 15 17 19 21 24 26 27 32 34 35 38 39 41 44 53 54 {
		replace expanded_2014 = 1 if state == "`state'"
	}
	foreach state in 33 42 18 {
		replace expanded_2015 = 1 if state == "`state'"	
	}
	
	  keep if (expanded_2014 == 1 | expanded_2015 == 1)

	keep state_name state county_name cfips year n_pop_cty_yr share_insured share_uninsured2013 
	order state_name state county_name cfips year n_pop_cty_yr share_insured share_uninsured2013 
	gisid cfips year 
	sort cfips year

	save "${intermediate_data}/ACA/sahie_cleaned.dta", replace
	
	keep cfips share_uninsured2013
	duplicates drop
	save "${intermediate_data}/ACA/sahie_cleaned_share_uninsured2013.dta", replace
end

********* Regression analysis 

program aca_analysis		
	
	* Load data
	use "${clean_data}/panel_physicians_clean.dta", clear	
	merge m:1 cfips using "${intermediate_data}/ACA/sahie_cleaned_share_uninsured2013.dta", keep(match) nogen
	merge m:1 cfips year using "${intermediate_data}/ACA/sahie_cleaned.dta", keep(master match) nogen

	* Construct additional outcome variables of interest 
	gen einsize1=(einsize==1) if !missing(einsize)
	gen einsize1to2=(einsize<3) if !missing(einsize)
	gen einsize1to5=(einsize<6) if !missing(einsize)
	gen einsize1to28=(einsize<29) if !missing(einsize)
	
	gen logpftloss=log(pftloss)
	gen logtotal_money_income=log(total_money_income)
	gen logw2wgs=log(w2wgs)	
	
	keep personid id stfips cfips year age logptotinc samecmsspec_share top1 ///
			 aginc logaginc pftloss logpftloss ///
			 profinc w2wgs logw2wgs w2wgs_salary total_money_income logtotal_money_income ///
			 schedule_c_flag schedule_e_flag schd_se ///
			 retired_1099ssa retired_w2wgsal ///
			 se_schedules se_acs govt_acs wkh wkh_imputed ///
			 einsize* share_insured ///
			 share_uninsured2013 n_pop_cty_yr ///
			
	* Create indicator for being in the post-treatment period 
	gen post2013 = year > 2013
	gen post2010 = year > 2010
	gen post2010pre2014 = inrange(year, 2011, 2013)
	
	gen iv=post2013*share_uninsured2013
	gen post2010pre2014shrui=post2010pre2014*share_uninsured2013 
	gen post2013shrui=post2013*share_uninsured2013

	gisid personid year
	gsort personid year
	
	* Encode cfips
	encode cfips, gen(cfips_num)

	* First stage event study plot
	reghdfe share_insured i(2005/2012 2014/2017).year#c.share_uninsured2013 if inrange(year, 2005, 2017) & inrange(age, 40,70), a(cfips year age) cluster(cfips)
	
	preserve 
		parmest, norestore
		keep if regexm(parm, "#")
		gen outcome = "share_insured"
		gen indepvar = "share_uninsured2013"
		gen spec = "eventstudy_FS"
		gen agerange = "all"
		gen obs = `e(N)'
		ren dof unique_cnty
		gen N_UR = `e(N)'

		keep outcome indepvar parm estimate min95 max95 N_UR
		drbest_docinc estimate min95 max95			
		order N_UR, last
		export delimited using "$mypath/intermediate_csv/govtpolicy02-ACA_eventstudy_FS.csv", replace dataf delim(tab) 
	restore 
	
	loc i = 0
	
	* Regressions
	
	foreach outcome in logptotinc schd_se retired_1099ssa {
		
		* Set locals
		loc ++i
		if "`outcome'" == "retired_1099ssa" 		loc agerng "44,70"
		// All individuals who turn 56 to 70 at some point in the sample
		else loc agerng "40,55"

		* Event studies 

		reghdfe `outcome' i(2005/2009 2011/2017).year#c.share_uninsured2013 if inrange(year, 2005, 2017) & inrange(age,`agerng'), a(cfips year age) cluster(cfips)

		qui sum `outcome' if e(sample) & inrange(year, 2005, 2010)  
			loc meandepv = `r(mean)'
			loc sddepv = `r(sd)'

		qui sum share_uninsured2013 if e(sample) & inrange(year, 2005, 2010)  
			loc meanindepv = `r(mean)'
			loc sdindepv = `r(sd)'
				
		preserve
			tempfile es`i'
			parmest, norestore
			gen outcome = "`outcome'"
			gen indepvar = "yearshrui"
			gen spec = "event_study"
			gen agerange = "`agerng'"
			
			gen mean_depvar = `meandepv'
			gen sd_depvar = `sddepv'
			gen mean_indepvar = `meanindepv'
			gen sd_indepvar = `sdindepv'
			
			gen obs = `e(N)'
			ren dof unique_cnty
			gen N_UR = `e(N)'
			
			save `es`i'', replace
		restore
		
		* DDIV
		
			// Reduced Form:

			reghdfe `outcome' 1.post2010pre2014#c.share_uninsured2013  1.post2013#c.share_uninsured2013 if inrange(year, 2005, 2017) & inrange(age,`agerng'), a(cfips year age) cluster(cfips)
		
			qui sum `outcome' if e(sample) & inrange(year, 2005, 2010)  
				loc meandepv = `r(mean)'
				loc sddepv = `r(sd)'

			qui sum share_uninsured2013 if e(sample) & inrange(year, 2005, 2010)  
				loc meanindepv = `r(mean)'
				loc sdindepv = `r(sd)'	
					
			preserve
				tempfile rf`i'
				parmest, norestore
				gen outcome = "`outcome'"
				gen indepvar = "post2010pre2014shrui post2013shrui"
				gen spec = "rf"
				gen agerange = "`agerng'"
				
				gen mean_depvar = `meandepv'
				gen sd_depvar = `sddepv'
				gen mean_indepvar = `meanindepv'
				gen sd_indepvar = `sdindepv'
				
				gen obs = `e(N)'
				ren dof unique_cnty
				gen N_UR = `e(N)'
				
				save `rf`i'', replace
			restore
			
			// First stage:

			if `i' == 1 {
			reghdfe share_insured 1.post2010pre2014#c.share_uninsured2013  1.post2013#c.share_uninsured2013 if inrange(year, 2005, 2017) & inrange(age,`agerng'), a(cfips year age) cluster(cfips) 
			
				qui sum share_insured if e(sample) & inrange(year, 2005, 2010)  
					loc meandepv = `r(mean)'
					loc sddepv = `r(sd)'
				
				qui sum share_uninsured2013 if e(sample) & inrange(year, 2005, 2010)  
					loc meanindepv = `r(mean)'
					loc sdindepv = `r(sd)'
					
			preserve
				tempfile fs`i'
				parmest, norestore
				gen outcome = "share_insured"
				gen indepvar = "post2010pre2014shrui post2013shrui"
				gen spec = "fs"
				gen agerange = "`agerng'"
				
				gen mean_depvar = `meandepv'
				gen sd_depvar = `sddepv'
				gen mean_indepvar = `meanindepv'
				gen sd_indepvar = `sdindepv'
				
				gen obs = `e(N)'
				ren dof unique_cnty
				gen N_UR = `e(N)'
				
				save `fs`i'', replace
			restore			
			}

			// DDIV:	

			ivreghdfe `outcome' (share_insured=iv) if inrange(year, 2005, 2017) & inrange(age,`agerng'), a(cfips year age) cluster(cfips_num) 
			
			qui sum `outcome' if e(sample) & inrange(year, 2005, 2010)  
				loc meandepv = `r(mean)'
				loc sddepv = `r(sd)'
				
			qui sum share_insured if e(sample) & inrange(year, 2005, 2010)  
				loc meanindepv = `r(mean)'
				loc sdindepv = `r(sd)'
					
			preserve
				tempfile ddiv`i'
				parmest, norestore
				gen outcome = "`outcome'"
				gen indepvar = "share_insured=iv"
				gen spec = "ddiv"
				gen agerange = "`agerng'"
				
				gen mean_depvar = `meandepv'
				gen sd_depvar = `sddepv'
				gen mean_indepvar = `meanindepv'
				gen sd_indepvar = `sdindepv'
				
				gen obs = `e(N)'
				ren dof unique_cnty
				gen N_UR = `e(N)'
					
				save `ddiv`i'', replace
			restore	
			
			// DDIV: drop years 2011-2013 inclusive 

			ivreghdfe `outcome' (share_insured=iv) if (inrange(year,2005,2010) | inrange(year,2014,2017)) & inrange(age,`agerng'), a(cfips year age) cluster(cfips_num) 
			
			qui sum `outcome' if e(sample) & (inrange(year,2005,2010) | inrange(year,2014,2017)) 
				loc meandepv = `r(mean)'
				loc sddepv = `r(sd)'
				
			qui sum share_insured if e(sample) & (inrange(year,2005,2010) | inrange(year,2014,2017)) 
				loc meanindepv = `r(mean)'
				loc sdindepv = `r(sd)'
					
			preserve
				tempfile ddivdrop`i'
				parmest, norestore
				gen outcome = "`outcome'"
				gen indepvar = "share_insured=iv"
				gen spec = "ddivdrop"
				gen agerange = "`agerng'"
				
				gen mean_depvar = `meandepv'
				gen sd_depvar = `sddepv'
				gen mean_indepvar = `meanindepv'
				gen sd_indepvar = `sdindepv'
				
				gen obs = `e(N)'
				ren dof unique_cnty
				gen N_UR = `e(N)'
					
				save `ddivdrop`i'', replace
			restore				

	} 
	
	use `fs1', clear
	foreach spec of numl 1(1)`i' {
		append using `es`spec''
		append using `rf`spec''
		append using `ddiv`spec''
		append using `ddivdrop`spec''
	}
	
	replace agerange = subinstr(agerange,",","-",.)
	
	keep outcome indepvar mean_depvar sd_depvar mean_indepvar sd_indepvar obs unique_cnty ///
	spec parm estimate stderr min95 max95 N_UR
	
	drbest_docinc estimate stderr min95 max95 mean_depvar sd_depvar mean_indepvar sd_indepvar
	drbcount obs unique_cnty, replace
	order N_UR, last
	export delimited using "$mypath/intermediate_csv/govtpolicy02-ACA_es_fs_ddiv.csv", replace dataf delim(tab) 
	
end 

run_aca
